Imports text_coordinates2.DynaPDF
Imports System.Collections.Generic

Module Module1
   Public Structure TText
      Public Advance As Single
      Public SpaceWidth As Single
      Public Text As String
      Public Width As Single
   End Structure

   Public Structure TTextRec
      Public Text() As TText
      Public X As Single
      Public Y As Single
      Public FontSize As Single
      Public SpaceWidth As Single
      Public Width As Single
   End Structure

   ' Error callback function.
   ' If the function name should not appear at the beginning of the error message then set
   ' the flag emNoFuncNames (pdf.SetErrorMode(TErrMode.emNoFuncNames)).
   Private Function PDFError(ByVal Data As IntPtr, ByVal ErrCode As Integer, ByVal ErrMessage As IntPtr, ByVal ErrType As Integer) As Integer
      Console.WriteLine("{0}", System.Runtime.InteropServices.Marshal.PtrToStringAnsi(ErrMessage))
      Return 0 ' We try to continue if an error occurrs. Any other return value breaks processing.
   End Function

   ' This class extracts the text from a PDF page.
   Private m_TextCoords As CTextCoordinates
   Private m_TextArray As List(Of TTextRec)

   Private Function parseBeginTemplate(ByVal Data As IntPtr, ByVal Obj As IntPtr, ByVal Handle As Integer, ByRef BBox As TPDFRect, ByVal Matrix As IntPtr) As Integer
      Return m_TextCoords.BeginTemplate(BBox, Matrix)
   End Function

   Private Sub parseMulMatrix(ByVal Data As IntPtr, ByVal Obj As IntPtr, ByRef M As TCTM)
      m_TextCoords.MulMatrix(M)
   End Sub

   Private Function parseRestoreGraphicState(ByVal Data As IntPtr) As Integer
      m_TextCoords.RestoreGState()
      Return 0
   End Function

   Private Function parseSaveGraphicState(ByVal Data As IntPtr) As Integer
      Return m_TextCoords.SaveGState()
   End Function

   Private Sub parseSetFont(ByVal Data As IntPtr, ByVal Obj As IntPtr, ByVal Type As TFontType, ByVal Embedded As Integer, ByVal FontName As IntPtr, ByVal Style As TFStyle, ByVal FontSize As Double, ByVal IFont As IntPtr)
      m_TextCoords.SetFont(FontSize, Type, IFont)
   End Sub

   Private Function parseShowTextArrayW(ByVal Data As IntPtr, ByVal Source() As TTextRecordA, ByRef Matrix As TCTM, ByVal Kerning() As TTextRecordW, ByVal Count As Integer, ByVal Width As Double, ByVal Decoded As Integer) As Integer
      Try
         Return m_TextCoords.AddText(m_TextArray, Matrix, Source, Kerning, Count, Width, Decoded <> 0)
      Catch
         Return -1
      End Try
   End Function

   Sub Main()
      Try
         Dim pdf As CPDF = New CPDF()
         m_TextCoords = New CTextCoordinates(pdf)
         m_TextArray = New List(Of TTextRec)(1024)

         ' You can either use events or declare a callback function.
         pdf.SetOnErrorProc(AddressOf PDFError)
         pdf.CreateNewPDF(Nothing) ' We do not create a PDF file in this examples

         ' External CMaps should always be loaded when processing text from PDF files.
         ' See the description of ParseContent() for further information.
         pdf.SetCMapDir("../../../../../Resource/CMap/", TLoadCMapFlags.lcmRecursive Or TLoadCMapFlags.lcmDelayed)

         ' We avoid the conversion of pages to templates
         pdf.SetImportFlags(TImportFlags.ifImportAll Or TImportFlags.ifImportAsPage)
         If (pdf.OpenImportFile("../../../../../dynapdf_help.pdf", TPwdType.ptOpen, Nothing) < 0) Then
            Console.Write("Input file ""../../../../../dynapdf_help.pdf"" not found!" + Chr(10))
            Console.Read()
            Exit Sub
         End If
         pdf.ImportPDFFile(1, 1.0, 1.0)
         pdf.CloseImportFile()

         ' We flatten markup annotations and form fields so that we can extract the text of these objects too.
         pdf.FlattenAnnots(TAnnotFlattenFlags.affMarkupAnnots)
         pdf.FlattenForm()

         Dim stack As TPDFParseInterface = New TPDFParseInterface
         stack.BeginTemplate = AddressOf parseBeginTemplate
         stack.MulMatrix = AddressOf parseMulMatrix
         stack.RestoreGraphicState = AddressOf parseRestoreGraphicState
         stack.SaveGraphicState = AddressOf parseSaveGraphicState
         stack.SetFont = AddressOf parseSetFont
         stack.ShowTextArrayW = AddressOf parseShowTextArrayW

         Dim i As Integer
         For i = 1 To pdf.GetPageCount()
            pdf.EditPage(i)
            m_TextCoords.Init()
            m_TextArray.Clear()
            ' The text of the enitre page is stored in the list m_TextArray.
            pdf.ParseContent(stack, TParseFlags.pfNone)
            ' The text array contains now all text records of the page including the start coordinates, text length, and font sizes

            pdf.EndPage()
         Next i
      Catch e As Exception
         Console.Write(e.Message + Chr(10))
         Console.Read()
      End Try
   End Sub

End Module
